import time
from selenium import webdriver
from scrapy.selector import Selector
from selenium.webdriver.chrome.options import Options
import re
import requests
from user_agent import generate_user_agent
from config import *
from utils.crawler import baidu_parse, google_parse, getHTMLText

def get_page(search_list):

    # driver.implicitly_wait(30)
    page_list = []
    # 不打开浏览器
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome(chrome_options=chrome_options)

    # 打开浏览器
    # driver = webdriver.Chrome()
    k = 0
    for i in search_list:
        # 计数
        print(k)
        k += 1
        driver.get('https://www.google.com/')
        #//*[@id="search-input"]
        driver.find_element_by_xpath('//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input').send_keys(i[0]+i[1])
        driver.find_element_by_xpath('//*[@id="tsf"]/div[2]/div[1]/div[3]/center/input[1]').click()
        page_list.append(driver.page_source)
    # print(type(driver.page_source))
    # return driver.page_source
    return page_list

def google_parse(html):
    res = []
    for html in html:
        rs = []
        page = Selector(text=html)
        for ans in page.css('div.g'):
            title = ''.join(ans.css('h3').css('*::text').extract())
            content = ''.join(ans.css('span.st').css('*::text').extract())
            url = ans.css('*.r a::attr(href)').extract()
            try:
                url = re.findall('(http.*)', url[0])
                url = re.sub('&.*', '', url[0])
                rs.append({
                    'url': url,
                    'content': content,
                    'title': title,
                })
                res.append(rs)
            except Exception:
                pass
    return res

def get_google_page(i):

    url = 'https://www.google.'+ google_url[i] +'/search?q=JieTang&hl=zh-CN'
    html = getHTMLText(url)

    page_info = google_parse(html)
    print(page_info)
    print("================================")


if __name__ == '__main__':
    start =time.clock()
    # search_list = [
    #     ["jie tang", "Tsinghua University"], ["qingyu wu", "Tsinghua University"], ["qingyu wu", "Tsinghua University"],
    #     ["si yuan", "Tsinghua University"], ["yi qian", "Tsinghua University"], ["li shi", "Tsinghua University"]
    # ]

    for i in range(len(google_url)):
        get_google_page(i)

    # search_list = [["jie tang", "Tsinghua University"]]
    # print(111111111111111)
    # res = google_parse(get_page(search_list))
    # for i in res:
    #     print(i)
    # end = time.clock()
    # print('Running time: %s Seconds'%(end-start))
